# -*- coding: UTF-8 -*-

'''
Created on 2014年7月5日

@author: areshero
'''

# 
# class owlHandler:
#     def __init__(self):
#         
#     

from home.models import Ontology
from django.db import connection
#from xml.etree import ElementTree
#from  xml.dom import  minidom
import MySQLdb
import traceback
import rdflib
from rdflib import Namespace,Graph, Literal, BNode, Namespace, RDF, URIRef

import datetime

'''
以下三个函数为处理tag元信息的函数，要保存的目前只有tag的名称和full_id，ontology的id已经保存过数据库
'''

#读取TTL文件，并将tag的信息存储到数据库当中
def readTTLFile(filename):
    ttlFile = open(filename)
    conn = MySQLdb.connect(host = '127.0.0.1',user = 'root',passwd = '',
                           connect_timeout=100000000,
                           db='mitagger') 
    cursor = conn.cursor()
    file = open("./sql.txt")
    sql1 = file.readline()
    print "1"
    cursor.execute(sql1)
    cursor.close()
    conn.commit()
    conn.close()
    return True
    print "over"
    
    sqlString = "insert into mitagger_tags(name,tag_type_id,fullId,ontology_id) values"
    nextLine = ttlFile.readline()
    while nextLine != "end\n":

        currentTag = dict()

        aowlClassIndex = nextLine.find("a owl:Class",0,len(nextLine))
        while aowlClassIndex == -1:
            nextLine = ttlFile.readline()
            aowlClassIndex = nextLine.find("a owl:Class",0,len(nextLine))
        if aowlClassIndex != -1:
            c_fullId = nextLine[1:nextLine.find(">",0,len(nextLine))]
            currentTag["fullId"] = c_fullId
            nextLine = ttlFile.readline()
            
        
        while nextLine.find("a owl:Class",0,len(nextLine)) == -1 and nextLine != "end\n":
            maohaoIndex = nextLine.find(" ",0,len(nextLine))
            if maohaoIndex == -1:
                nextLine = ttlFile.readline()
                continue
            else:
                itemName = nextLine[1:maohaoIndex]
                endIndex = nextLine.find("@",0,len(nextLine))
                
                # get the tag FullID
                
                    
                # get the tag Name 
                tagNameIndex = nextLine.find("prefLabel",0,len(nextLine))
                if tagNameIndex != -1:
                    tempName = nextLine[16:endIndex].replace("\"","").replace("@","")
                    
                    currentTag['name'] =  tempName
                    nextLine = ttlFile.readline()
                    continue
                
                
                nextLine = ttlFile.readline()
                
        print currentTag['name']
#         try:
#             
#             currentTag = Tag.objects.get_or_create(name = currentTag['name'],
#                                            fullId = currentTag['fullId'],
#                                            ontology_id = '4',
#                                            tag_type_id = '4')
#         except :
#             pass
        if nextLine == "end\n":
            break
        nextLine = ttlFile.readline()
        tmpSqlString = " ( \""+currentTag['name'] + "\",'4','"+currentTag['fullId']+"','4'),"
        sqlString = sqlString + tmpSqlString
    
    print "over"    
    sql = sqlString[:len(sqlString)-1] + ";"
    sqlFile = open("sql.txt",'w')
    print "exectue sql"
    print >> sqlFile,sql
    cursor.execute(sql)
    return True        
        

    
#readTTLFile("./RXNORM.ttl")
#读取RDF文件，并将tag的信息存储到数据库当中
def readRDFFile(filename):
    
    #######################
    print "begin" 
    time1 = datetime.datetime.now()
    #######################
    
    
    conn = MySQLdb.connect(host = '127.0.0.1',user = 'root',passwd = '',
                           connect_timeout=100000000,
                           db='mitagger') 
    cursor = conn.cursor()
    sqlString = "insert into mitagger_tags(name,tag_type_id,fullId,ontology_id) values"
    
    g = rdflib.Graph()
    g = rdflib.Graph()
    g.load(filename)
    
    #######################
    time2 = datetime.datetime.now()
    print "load time:" 
    print (time2 - time1)
    #######################
    
    
    URI_CLASS = rdflib.URIRef(u'http://www.w3.org/2002/07/owl#Class')
    # Get set of subjects(Class)
    subjects = set()
    for s in g.subjects(object=URI_CLASS):
        subjects.add(s)
        
        
    #######################
    time3 = datetime.datetime.now()
    print "add subjects time:"
    print (time3 - time2)
    #######################
    #sqlString = "insert into mitagger_tags(name,tag_type_id,fullId,ontology_id) values"
    ##tmpString = "insert into mitagger_tags(name,tag_type_id,fullId,ontology_id) values"
    #sqlStringLength = len(sqlString)
    
    
    #sqlFile = open("insert_NCIT.txt",'a')
    NCITDataFile = open("SNOMEDCTANATOMY.txt",'w')
    SQLFile = open("SNOMEDCTSQL.txt",'w')
    for s in subjects:
        label = g.label(s)
        # Get label of one subject
        # Every subject has one label
        # If the subject doesn't have a label we ignore it
        if len(label) != 0:
            try:
                print >> NCITDataFile,s
                print >> NCITDataFile,label
                tmpSqlString = " ( \""+label.replace('"','\"') + "\",'4','"+s+"','3'),"
                sqlString = sqlString + tmpSqlString
            except:
                print s
                print label
    sql = sqlString[:len(sqlString)-1] + ";"        
    cursor.execute(sql)
#             count = count + 1
#             if count > 1000:
#                 sql = sqlString[:len(sqlString)-1] + ";"
#                 print "execute sql"
#                 time31 = datetime.datetime.now()
#                 print >> sqlFile,sql
#                 print sql
#                 cursor.execute(sql)
#                 cursor.commit()
#                 time32 = datetime.datetime.now()
#                 print "execute sql done using time:"
#                 print (time32-time31)
#                 sqlString = tmpString
#                 count = 0
            # Get all triples of the subject
#             t = g.triples((s, None, None))
#             for (m1, rdflabel, rdfvalue) in t:
#                 # Do some jobs according to rdflabel and rdfvalue
#                 # rdflabel is the label of RDF tag
#                 # rdfvalue is the value of RDF tag
#                 # example: <rdfs:subClassOf rdf:resource="&obo;ICO_0000111"/>
#                 #   rdflabel is rdfs:subClassOf
#                 #   rdfvalue is &obo;ICO_0000111
#                 print '    ', rdflabel, rdfvalue
#             print ''
#     time4 = datetime.datetime.now()
#     print "string append time:"
#     print (time4 - time3)
#     if count <= 1000:
#         sql = sqlString[:len(sqlString)-1] + ";"
#         print "execute sql"
#         print >> sqlFile,sql
#         cursor.execute(sql)
#         cursor.commit()
#
#     time5 = datetime.datetime.now()
#     print "execute sql time"
#     print (time5 - time4)
    cursor.close()
    return True

#readRDFFile("./data/snomed_anatomy_1.2.owl")
#读取txt文件，并将tag信息存储到数据库当中
def saveTxtDataFile(filename):
    
    m_file = open(filename)
    
    sqlString = "insert into mitagger_tags(name,tag_type_id,fullId,ontology_id) values"
    tmpString = "insert into mitagger_tags(name,tag_type_id,fullId,ontology_id) values"
    sqlFile = open("insert_NCIT.txt",'w')
    
    nextLine = "start"
    count = 0
    conn = MySQLdb.connect(host = '127.0.0.1',user = 'root',passwd = '',
                           connect_timeout=100000000,
                           db='mitagger') 
    cursor = conn.cursor()
    
    
    while nextLine != "":
        try:
            tagID = m_file.readline()
            nextLine = m_file.readline()
            print nextLine.replace('\"','\\"').replace('\n','')
            tmpSqlString = " ( \""+nextLine.replace('\"','\\"').replace('\n','') + "\",\"4\",\""+tagID.replace('\n','').replace('\#','#') +"\",\"3\"),"
            sqlString = sqlString + tmpSqlString
            if count > 1000:
                sql = sqlString[:len(sqlString)-1] + ";"
                
                print sql
                print >> sqlFile,sql
                cursor.execute(sql)
                sqlString = tmpString
                count = 0
            else:
                count = count + 1
        except:
            
            print traceback.print_exc()
    if count <= 1000:
        sql = sqlString[:len(sqlString)-1] + ";"
        print "execute sql"
        print >> sqlFile,sql
        cursor.execute(sql)
        
    conn.commit()
    conn.close()
    return True
    
#saveTxtDataFile("SNOMEDCTANATOMY.txt")

def readOBOFile(filename):    
    OBOFile = open(filename)
    currentOntology = Ontology.objects.get(virtual_id = "http://data.bioontology.org/ontologies/PR")
    nextLine = OBOFile.readline()
    while(nextLine != '[Term]\n'):
        nextLine = OBOFile.readline()
    #first term
    
    cursor = connection.cursor()
    sqlString = "insert into mitagger_tags(name,tag_type_id,fullId,ontology_id) values"
    while(nextLine == '[Term]\n'):
        
        nextLine = OBOFile.readline()
        currentTag = dict()
        currentTag['is_a'] = []

        while(nextLine != '[Term]\n' and nextLine != 'areshero\n'):
            
            spaceIndex = nextLine.find(" ",0,len(nextLine))
            itemName = nextLine[0:spaceIndex-1]
            itemValue = nextLine[spaceIndex+1:len(nextLine)].replace('\n','')
            if itemName == 'is_a':
                currentTag[itemName].append(itemValue)
            else:
                currentTag[itemName] = itemValue
                
            nextLine = OBOFile.readline()

        print currentTag['id']
    #     c_fullId = "http://purl.obolibrary.org/obo/" + nextLine[4:].replace(':','_')
    #     c_tagName = OBOFile.readline()[6:]
    #     c_ontologyId = currentOntology.id
        #c_parentId = currentTag['is_a']
        
        c_fullId = "http://purl.obolibrary.org/obo/" + currentTag['id'].replace(':','_')
        tmpSqlString = " ( \""+currentTag['name'] + "\",'4','"+c_fullId+"','"+str(currentOntology.id)+"'),"
        sqlString = sqlString + tmpSqlString

        
#         currentTag = Tag.objects.get_or_create(name = currentTag['name'],
#                                                fullId = c_fullId,
#                                                ontology_id = currentOntology.id,
#                                                tag_type_id = '4')
        
        
    sql = sqlString[:len(sqlString)-1] + ";"
    print sql
    cursor.execute(sql)
    
    return True

#readOBOFile("./data/pro_reasoned.obo")
